# -*- coding: utf-8 -*-
import scrapy
from scrapy import Request
from scrapy.exceptions import IgnoreRequest
from zc_core.util.http_util import retry_request
from zc_core.model.items import Box
from zc_core.util.batch_gen import time_to_batch_no
from eavic.rules import *
from datetime import datetime
from zc_core.spiders.base import BaseSpider


class SkuSpider(BaseSpider):
    name = 'sku'
    index_url = 'https://mall.eavic.com/?format=html'
    # 获取所有列表链接
    catalogs_url = 'https://mall.eavic.com/sub_catalogs/{}'
    # 获取所有商品列表链接
    sku_list_url = 'https://mall.eavic.com/channel/{}.html?page={}'

    def __init__(self, batchNo=None, *args, **kwargs):
        super(SkuSpider, self).__init__(batchNo=batchNo, *args, **kwargs)
        self.page_size = 20

    def _build_list_req(self, url, callback, page=1, category1_name='', category1_id='', category2_id='',
                        category3_id='', mark=False, flag=False):
        return Request(url=url,
                       callback=callback,
                       errback=self.error_back,
                       meta={
                           'batchNo': self.batch_no,
                           'category1_name': category1_name,
                           'category1_id': category1_id,
                           'category2_id': category2_id,
                           'category3_id': category3_id,
                           'page': page,
                           'mark': mark,
                           'flag': flag,

                       },
                       dont_filter=True,
                       headers={
                           'X-Requested-With': 'XMLHttpRequest',
                           'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.72 Safari/537.36 Edg/90.0.818.42'
                       }
                       )

    # 请求获取一级分页id
    def start_requests(self):
        yield self._build_list_req(self.index_url, self.parse_cotalogs)

    # 处理一级分页，并获取所有二级分页的内容
    def parse_cotalogs(self, response):
        parse_level_1 = parse_catalog_1(response)
        # 品类、品牌
        for index, i in enumerate(parse_level_1[1]):
            yield self._build_list_req(self.catalogs_url.format(i), self.parse_sku_content_deal,
                                       category1_name=parse_level_1[0][index], category1_id=i, mark=True)

    # 处理sku列表
    def parse_sku_content_deal(self, response):
        meta = response.meta
        category1_name = meta.get('category1_name', '')
        category1_id = meta.get('category1_id', '')
        category2_id = meta.get('category2_id', '')
        category3_id = meta.get('category3_id', '')
        cur_page = meta.get('page', '')
        mark = meta.get('mark')
        flag = meta.get('flag')
        # 处理品类列表
        if mark:
            cats = parse_catalog(response, category1_name, category1_id)
            if cur_page == 1:
                if cats:
                    self.logger.info('品类: count[%s]' % len(cats))
                    yield Box('catalog', self.batch_no, cats)
                    for cat in cats:
                        if cat.get('level') == 3:
                            # 采集sku列表
                            category3_id = cat.get('catalogId')
                            category3_name = cat.get('catalogName')
                            yield self._build_list_req(self.sku_list_url.format(category3_id, 1),
                                                       self.parse_sku_content_deal, page=1,
                                                       category1_name=category1_name, category1_id=category1_id,
                                                       category2_id=category2_id,
                                                       category3_id=category3_id, flag=True)
                else:
                    self.logger.info('空页: cat=%s, page=%s,状态:%s' % (category3_id, cur_page, mark))
        else:

            if flag:
                total_pages = parse_total_page(response)
                self.logger.info('清单： 总页数: cat=%s, page=%s' % (category3_id, total_pages))
                # 正在处理sku列表首页
                sku_list = parse_sku(response)
                if sku_list:
                    yield Box('sku', self.batch_no, sku_list)
                else:
                    self.logger.error('空页: %s', response.url)
                for page in range(2, total_pages + 1):
                    yield self._build_list_req(self.sku_list_url.format(category3_id, page),
                                               self.parse_sku_content_deal,
                                               page=page, category1_name=category1_name, category1_id=category1_id,
                                               category2_id=category2_id,
                                               category3_id=category3_id,
                                               flag=False)
            else:
                # 正在处理sku列表2到末尾
                sku_list = parse_sku(response)
                if sku_list:
                    yield Box('sku', self.batch_no, sku_list)
